*************************************
* Extract MORG 1983-2019 
*************************************

clear 
set more off
set type double, perm
tempfile temp

* Prior to 1983 no union data

foreach x of num 1983/2019{
	
	
	local y=substr(string(`x'),3,4)
	
	use "${raw}/cps/morg`y'.dta" if age>15&age<=65, clear
	
	/* Some recoding files */
	do "${cps}/programs/lfs.do" 
	do "${cps}/programs/educ.do" 
	do "${cps}/programs/student.do" 
	
	capture append using "`temp'"
	save "`temp'", replace
	
}

/*** State fix for 2014- ***/

do "${cps}/programs/statefix.do"
label drop P27L

/*** Create consistent class of worker variable ***/
do "${cps}/programs/class_wkr.do"

/*** Create dummy variable for female ***/
generate female=sex-1
drop sex

/*** Create dummy variable for hispanic ***/
generate hispanic=0
replace hispanic=1 if (year>=1979 & year<=2002) & (ethnic>=1 & ethnic<=7)
replace hispanic=. if (year>=1979 & year<=2002) & (ethnic==9 | ethnic==.)
replace hispanic=1 if (year>=2003 & year<=2013) & (ethnic>=1 & ethnic<=5)
replace hispanic=1 if (year>=2014 & year<=2020) & (ethnic>=1 & ethnic<=7)
drop ethnic


/*** Create consistent race variable ***/
do "${cps}/programs/race_bridge.do"

generate race_new=.
replace race_new=1 if (year>=1979 & year<=2002) & race==1
replace race_new=2 if (year>=1979 & year<=2002) & race==2
replace race_new=3 if (year>=1979 & year<=2002) & race>=3 & race~=.
replace race_new=1 if (year>=2003 & year<=2020) & ONERACE==4
replace race_new=2 if (year>=2003 & year<=2020) & ONERACE==3
replace race_new=3 if (year>=2003 & year<=2020) & ONERACE==1 | ONERACE==2
drop race ONERACE
rename race_new race
label define race 1 "White" 2 "Black" 3 "Other"
label values race race
*drop if race==.

/*** Create dummy variable for black ***/
generate black=0
replace black=1 if race==2
replace black=. if race==.


/*** Create dummy variable for married ***/
/*** Marital status questions were only asked to people with age>=15, but current sample only includes people with age>=16 ***/
generate married=0
replace married=1 if marital==1 | marital==2
replace married=. if marital==.
drop marital


************************************
* Years of schooling 
***********************************
do "${cps}/programs/year_sch.do"

/* Potential Experience */
gen exp = max(min(age-yearsch_2-6, age-16),0)
gen pexp=exp
gen pexp2=pexp^2

/*** Create new usual hours variable by replacing missing values with values from hours worked last week ***/
replace uhourse=. if uhourse<=0
gen hours_flag=0
replace hours_flag=1 if uhourse==. & (hourslw>0 & hourslw~=.)
gen hours=uhourse
replace hours=hourslw if uhourse==. & (hourslw>0 & hourslw~=.)
*drop hourslwa hourslw uhours uhourse

/*** Recreate earnings allocation flags ***/
/*** Between 1989 and 1993, only about 1/4 of the allocated earnings are identified by the BLS allocation flags ***/
/*** Between January 1994 and August 1995, the BLS earnings allocation flags are missing ***/

gen flag_hr=0
replace flag_hr=1 if paidhre==1 & (I25c>0 & I25c~=.)
replace flag_hr=1 if paidhre==1 & earnhr~=earnhre & (year>=1989 & year<=1993)
replace flag_hr=0 if paidhre==1 & earnhr==earnhre & (year>=1989 & year<=1993)
replace flag_hr=. if year==1994
replace flag_hr=. if year==1995 & (intmonth>=1 & intmonth<=8)

gen flag_wk=0
replace flag_wk=1 if paidhre==2 & (I25d>0 & I25d~=.)
replace flag_wk=1 if paidhre==2 & uearnwk~=earnwke & (year>=1989 & year<=1993)
replace flag_wk=0 if paidhre==2 & uearnwk==earnwke & (year>=1989 & year<=1993)
replace flag_wk=. if year==1994
replace flag_wk=. if year==1995 & (intmonth>=1 & intmonth<=8)

drop I25a I25b I25c I25d


/*** Create a single allocation flag ***/
gen alloc=0
replace alloc=1 if flag_hr==1
replace alloc=1 if flag_wk==1
replace alloc=. if year==1994
replace alloc=. if year==1995 & (intmonth>=1 & intmonth<=8)
drop flag_hr flag_wk


/*** Fix topcoded weekly earnings ***/
/*** Multiply topcoded weekly earnings by 1.4 ***/

replace earnwke=. if earnwke==0

gen earnwk_1=earnwke
replace earnwk_1=earnwk_1*1.4 if earnwke== 999 & (year>=1979 & year<=1988)
replace earnwk_1=earnwk_1*1.4 if earnwke==1923 & (year>=1989 & year<=1997)
replace earnwk_1=earnwk_1*1.4 if earnwke==2884|earnwke==2884.61 & (year>=1998 & year<=2020)

gen topcode=0
replace topcode=1 if earnwk_1~=earnwke

* drop uearnwk uearnwke earnwke


/*** Create consistent hourly wage variable ***/
replace earnhre=. if earnhre==0
replace earnhre=earnhre/100
gen wage=.
replace wage=earnhre if paidhre==1
replace wage=earnwk_1/hours if paidhre==2

* Fix for 86-88 data to use higher topcodes
replace wage = uearnwk/hours if paidhre==2 & ///
					(year>=1986&year<=1988) & ///
					earnwke>999 & uearnwk>999 & uearnwk!=.
replace wage = uearnwk/hours*1.4 if paidhre==2 & ///
					(year>=1986&year<=1988) & ///
					uearnwk==1999 
					
// Notes on wages 
	* 1) Thomas uses unedited weekly earnings for 86-88 because of top codes. there are
	*    No allocated wages for this variable. 
	* 2) Autor does this too, but only for those that are topcoded.
	* 3) When I do this, it doesn not seem to have any affect

/*** Adjust topcoding indicator to show only hourly wages computed from topcoded weekly earnings ***/
replace topcode=0 if paidhre==1


******** Bring in price level data **************
do "${cps}/programs/price_indices.do"

// ------------- Wage Restrictions -----------------
gen FT = hours>=35&!missing(hours)
gen earnsamp = (class==1|class==2)&(lfstat==1)&hours>0 & !missing(hours)

* trimming based on Lemieux 2006
gen rawwage = wage
replace wage = . if wage*cpi79<1 | wage*cpi79>100
replace wage = . if earnsamp!=1

gen rhrw_cpi = rawwage*cpi
gen lnrhrw_cpi = log(rhrw_cpi)

gen hrw_cpi = wage*cpi
gen hrw_ppi = wage*ppi
gen hrw_pce = wage*pce

gen wkw_cpi = hrw_cpi*hours
gen wkw_ppi = hrw_ppi*hours
gen wkw_pce = hrw_pce*hours

foreach index in cpi pce ppi {
	gen lnhrw_`index' = log(hrw_`index')
	gen lnwkw_`index' = log(wkw_`index')
	gen lnftw_`index' = log(wkw_`index') if FT==1
	}
	
// ----------------- Weights --------------------
gen wgt = earnwt/12
gen fwt = weight/3
gen lswt = wgt*(hours/35)

/*** Create union variables ***/
/*** The union membership/coverage information is only available from 1983 ***/

gen union_mem=0
replace union_mem=1 if unionmme==1
replace union_mem=. if unionmme==.

gen union_cov=union_mem
replace union_cov=1 if unioncov==1
replace union_cov=0 if union_cov~=1 & unioncov==2
replace union_cov=. if union_cov~=1 & unioncov==. & (year>=1983 & year<=1993) & unionmme==2
replace union_cov=. if union_cov~=1 & unioncov==. & (year>=1994 & year<=2015)

drop unionmm unionmme unioncov


/*** Create division and region variables ***/

gen division=0
replace division=11 if state>=11 & state<=16
replace division=12 if state>=21 & state<=23
replace division=21 if state>=31 & state<=35
replace division=22 if state>=41 & state<=47
replace division=31 if state>=51 & state<=59
replace division=32 if state>=61 & state<=64
replace division=33 if state>=71 & state<=74
replace division=41 if state>=81 & state<=88
replace division=42 if state>=91 & state<=95

#delimit ;
label define division
11 "New England"
12 "Middle Atlantic"
21 "East North Central"
22 "West North Central"
31 "South Atlantic"
32 "East South Central"
33 "West South Central"
41 "Mountain"
42 "Pacific"
;
# delimit cr
label val division division

gen region=int(division/10)
label define region 1 "Northeast" 2 "Midwest" 3 "South" 4 "West" 
label val region region



// --------------------------------------
//			Occupations and Industries
// --------------------------------------

*	1970s
*replace ind70=. if year==1983
*do "${cps}/programs/CW_ind70_ind80.do"

*	1980s
gen ind_80 = ind80 if year>=1979&year<=1991
do "${cps}/programs/ind_80_des_crosswalk.do"

rename ind_des ind_des80

*	1990s
gen ind_90 = ind80 if year>=1992&year<=2002
*do $dir/clean/ind_90_des_crosswalk.do


*	2000s
gen ind_02=ind02 if year>=2003
g ind90=.
do "${cps}/programs/CW_ind90_ind_02.do"
replace ind_90 = ind90 if year>=2003&year<=2019
do "${cps}/programs/ind_90_des_crosswalk.do"
rename ind_des ind_des90

gen ind_des = ind_des80 if year>=1979&year<=1991
replace ind_des = ind_des90 if year>=1992&year<=2019
drop ind_des90 ind_des80

do "${cps}/programs/ind_des_major_recodes.do"
do "${cps}/programs/ind_des_labels.do"


// small fix for nber code on dind02
recode dind02 (6790=32)


drop if lfstat==.
lab data "extracted on `c(current_date)'"
sort year
save ${wd}/cps/CPSextract.dta, replace
 

